Prior 96w evaluation of TurboCapture-Seq v2 showed low UMIs recovered and high seq saturation. I did low throughput troubleshooting and didn’t see any issues. Process this experiment myself taking care to remove residual liquids from wash steps.
Process Hui Shi of Naik lab Bcor + Flt3 timecourse. Includes a few of my samples.
Samples
SCE object in generate in 1A_generateSCE_reads notebook. The samples were then clustered in the 2B Clustering Wt notebook
Compare the mixed cell culture stimulated with Flt3 for one day. This is a basic contrast that should have a lot of differentially expressed genes.
Workflow is from:
https://bioconductor.org/packages/devel/workflows/vignettes/RNAseq123/inst/doc/limmaWorkflow.html#differential-expression-analysis
This was generated in notebook 2B.
sce <- readRDS(here::here(
"data/TIRE_dendritic_mouse/SCEs/DCs_cluster.sce.rds"))
sce <- sce[,sce$Cell_type == "Total_DC_culture"]
dge <- scran::convertTo(sce, type="edgeR")Have a look at the important metadata.
There are not enough replicates for the preDC contrast to be
meaningful.
Samples are distinct but group along PC2
The samples are treated with FLt3 ligand and harvest after 24 hours.
## Ligand
## Timepoint_Day Flt3L None
## 0 0 6
## 1 3 0
## 5 0 0
## 7 0 0
I added this last to filter genes. About half the genes are filtered
out here.
Doing this reduces the multiple testing burden and fits variation
better.
## [1] 21484 9
keep.exprs <- filterByExpr(dge, group=dge$samples$Timepoint_Day, min.count=1)
dge <- dge[keep.exprs,, keep.lib.sizes=FALSE]
dim(dge)## [1] 9550 9
Build the model matrix.
Regress out the effect of the timepoint as this is minor compared to the
cell type subsets.
This means I can explicitly define a contrast matrix to make the comparisons of interest.
## LigandFlt3L LigandNone
## ACAGTAGCTC 0 1
## AGACGCATCG 1 0
## CCGTATGTAG 0 1
## CGTCAATAGT 0 1
## CTTGATCGCG 0 1
## GGAAAGATAC 0 1
Decide on the contrasts. In this case its simply plus or minus Flt3 ligand.
contr.matrix <- makeContrasts(
Ligand = LigandFlt3L - LigandNone,
levels = colnames(sm))
contr.matrix %>%
kable()| Ligand | |
|---|---|
| LigandFlt3L | 1 |
| LigandNone | -1 |
In each contrast, the format is A - B where:
Fit this model using limma voom
par(mfrow=c(1,2))
v <- voom(dge, sm, plot=TRUE)
vfit <- lmFit(v, sm)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
plotSA(efit, main="Final model: Mean-variance trend")Check how many genes are differentially expressed
## Ligand
## Down 313
## NotSig 8717
## Up 520
Check what the genes are
ChatGPT explanations:
results <- as_tibble(ligand)
results$ID <- rownames(ligand)
# add a column of NAs
results$DElabel <- "n/s"
# if log2Foldchange > 1 or < -1 and pvalue < 0.05, set as "UP"
results$DElabel[results$logFC > 1 & results$adj.P.Val < 0.1] <- "Flt3"
results$DElabel[results$logFC < -1 & results$adj.P.Val < 0.1] <- "Ctrl"Add gene labels
results$genelabels <- ""
results$genelabels[results$ID == "Fosb"] <- "Fosb"
results$genelabels[results$ID == "Jun"] <- "Jun"
results$genelabels[results$ID == "Chad"] <- "Chad"
results$genelabels[results$ID == "Zfp36"] <- "Zfp36"
results$genelabels[results$ID == "Egr1"] <- "Egr1"
results$genelabels[results$ID == "Ier2"] <- "Ier2"
results$genelabels[results$ID == "Fos"] <- "Fos"
results$genelabels[results$ID == "Nfkbiz"] <- "Nfkbiz"
results$genelabels[results$ID == "Ftl1"] <- "Ftl1"
results$genelabels[results$ID == "Lpl"] <- "Lpl"
results$genelabels[results$ID == "Siglech"] <- "Siglech"
results$genelabels[results$ID == "Myl10"] <- "Myl10"library(ggrepel)
plt3 <- ggplot(data=results, aes(x=logFC, y=-log10(adj.P.Val), colour=DElabel, label=genelabels)) +
geom_point(alpha=0.33, size=1.5) +
geom_text(size=3.5, alpha=1, colour="black",nudge_y=0.5) +
guides(colour = guide_legend(override.aes = list(size=3, alpha=1))) +
scale_color_manual(values = c("darkblue", "grey", "darkorange"), name = "Upregulated") +
geom_vline(xintercept = 1, linetype="dotted") + geom_vline(xintercept = -1, linetype="dotted") +
theme_Publication()
plt3plt2 <- ggplot(data=results, aes(x=AveExpr, y=logFC, colour=DElabel)) +
geom_point(alpha=0.75, size=1.5) +
guides(colour = guide_legend(override.aes = list(size=3, alpha=1))) +
ylab("Log fold chnage") + xlab("Log counts per million") +
scale_color_manual(values = c("darkblue", "grey", "darkorange"), name = "Upregulated") +
theme_Publication()
plt2Need to convert geneIDs from ensembl to enterez
geneids <- as.data.frame(v$genes$ID)
colnames(geneids) <- "ENSEMBL"
geneids$entrez <- mapIds(org.Mm.eg.db, keys = geneids$ENSEMBL, keytype = "ENSEMBL", column = "ENTREZID")load("data/MSigDB/mouse_c2_v5p2.rdata")
idx <- ids2indices(Mm.c2,identifiers = geneids$entrez)
cam.Ligand <- camera(v,idx,sm,contrast=contr.matrix[,1])
head(cam.Ligand,10)par(mfrow=c(1,1))
barcodeplot(efit$t[,1], index=idx$REACTOME_CHOLESTEROL_BIOSYNTHESIS,
index2=idx$NAGASHIMA_EGF_SIGNALING_UP, main="Ligand")(top) NAGASHIMA_EGF_SIGNALING_UP (bottom) REACTOME_CHOLESTEROL_BIOSYNTHESIS
Generate gene set barchart
load("data/MSigDB/mouse_H_v5p2.rdata")
idx <- ids2indices(Mm.H,identifiers = geneids$entrez)
cam.Ligand <- camera(v,idx,sm,contrast=contr.matrix[,1])
head(cam.Ligand,10)The Hallmark genesets are more informative to me.
par(mfrow=c(1,1))
barcodeplot(efit$t[,1], index=idx$HALLMARK_CHOLESTEROL_HOMEOSTASIS,
index2=idx$HALLMARK_TNFA_SIGNALING_VIA_NFKB, main="Ligand")(top)HALLMARK_TNFA_SIGNALING_VIA_NFKB (bottom)HALLMARK_MTORC1_SIGNALING
Generate gene set barchart
The write.fit function can be used to extract and write results for all three comparisons to a single output file.
The results make biological sense for a mixture cell that is exposed to Flt3 for 1 day.
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Red Hat Enterprise Linux 9.4 (Plow)
##
## Matrix products: default
## BLAS: /stornext/System/data/software/rhel/9/base/tools/R/4.4.1/lib64/R/lib/libRblas.so
## LAPACK: /stornext/System/data/software/rhel/9/base/tools/R/4.4.1/lib64/R/lib/libRlapack.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid splines stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] ggrepel_0.9.6 ggthemes_5.1.0
## [3] here_1.0.1 org.Mm.eg.db_3.19.1
## [5] AnnotationDbi_1.66.0 knitr_1.48
## [7] viridis_0.6.5 viridisLite_0.4.2
## [9] patchwork_1.3.0 edgeR_4.2.2
## [11] limma_3.60.6 scran_1.32.0
## [13] scuttle_1.14.0 lubridate_1.9.3
## [15] forcats_1.0.0 stringr_1.5.1
## [17] dplyr_1.1.4 purrr_1.0.2
## [19] readr_2.1.5 tidyr_1.3.1
## [21] tibble_3.2.1 ggplot2_3.5.1
## [23] tidyverse_2.0.0 SingleCellExperiment_1.26.0
## [25] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [27] GenomicRanges_1.56.2 GenomeInfoDb_1.40.1
## [29] IRanges_2.38.1 S4Vectors_0.42.1
## [31] BiocGenerics_0.50.0 MatrixGenerics_1.16.0
## [33] matrixStats_1.4.1
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 gridExtra_2.3
## [3] rlang_1.1.4 magrittr_2.0.3
## [5] scater_1.32.1 compiler_4.4.1
## [7] RSQLite_2.3.7 DelayedMatrixStats_1.26.0
## [9] png_0.1-8 vctrs_0.6.5
## [11] pkgconfig_2.0.3 crayon_1.5.3
## [13] fastmap_1.2.0 XVector_0.44.0
## [15] labeling_0.4.3 utf8_1.2.4
## [17] rmarkdown_2.28 tzdb_0.4.0
## [19] ggbeeswarm_0.7.2 UCSC.utils_1.0.0
## [21] bit_4.5.0 xfun_0.48
## [23] bluster_1.14.0 zlibbioc_1.50.0
## [25] cachem_1.1.0 beachmat_2.20.0
## [27] jsonlite_1.8.9 blob_1.2.4
## [29] highr_0.11 DelayedArray_0.30.1
## [31] BiocParallel_1.38.0 irlba_2.3.5.1
## [33] parallel_4.4.1 cluster_2.1.6
## [35] R6_2.5.1 bslib_0.8.0
## [37] stringi_1.8.4 jquerylib_0.1.4
## [39] Rcpp_1.0.13 Matrix_1.7-0
## [41] igraph_2.0.3 timechange_0.3.0
## [43] tidyselect_1.2.1 rstudioapi_0.17.0
## [45] abind_1.4-8 yaml_2.3.10
## [47] codetools_0.2-20 lattice_0.22-6
## [49] KEGGREST_1.44.1 withr_3.0.1
## [51] evaluate_1.0.1 Biostrings_2.72.1
## [53] pillar_1.9.0 generics_0.1.3
## [55] rprojroot_2.0.4 hms_1.1.3
## [57] sparseMatrixStats_1.16.0 munsell_0.5.1
## [59] scales_1.3.0 glue_1.8.0
## [61] metapod_1.12.0 tools_4.4.1
## [63] BiocNeighbors_1.22.0 ScaledMatrix_1.12.0
## [65] locfit_1.5-9.10 cowplot_1.1.3
## [67] colorspace_2.1-1 GenomeInfoDbData_1.2.12
## [69] beeswarm_0.4.0 BiocSingular_1.20.0
## [71] vipor_0.4.7 cli_3.6.3
## [73] rsvd_1.0.5 fansi_1.0.6
## [75] S4Arrays_1.4.1 gtable_0.3.5
## [77] sass_0.4.9 digest_0.6.37
## [79] SparseArray_1.4.8 dqrng_0.4.1
## [81] farver_2.1.2 memoise_2.0.1
## [83] htmltools_0.5.8.1 lifecycle_1.0.4
## [85] httr_1.4.7 statmod_1.5.0
## [87] bit64_4.5.2